library(readr)
tele <- read_csv("C:\\Users\\Rober\\OneDrive\\Documents\\Uni Stuff\\NOTEBOOKS\\Class\\telecom.csv")
-- Column specification ---------------------------------------------------------------------------------------------------------------------------------------------------------------
cols(
.default = col_character(),
SeniorCitizen = col_double(),
tenure = col_double(),
MonthlyCharges = col_double(),
TotalCharges = col_double()
)
i Use `spec()` for the full column specifications.
View(tele)
library("tidyverse")
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3 v dplyr 1.0.4
v tibble 3.0.6 v stringr 1.4.0
v tidyr 1.1.2 v forcats 0.5.1
v purrr 0.3.4
-- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library("ggplot2")
library("magrittr")
Attaching package: 㤼㸱magrittr㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
set_names
The following object is masked from 㤼㸱package:tidyr㤼㸲:
extract
library("dplyr")
library("data.table")
data.table 1.13.6 using 2 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: 㤼㸱data.table㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
between, first, last
The following object is masked from 㤼㸱package:purrr㤼㸲:
transpose
library("mlr3verse")
library("paradox")
library("mlr3tuning")
Loading required package: mlr3
library("skimr")
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
skim(tele)
-- Data Summary ------------------------
Values
Name tele
Number of rows 5986
Number of columns 20
_______________________
Column type frequency:
character 16
numeric 4
________________________
Group variables None
-- Variable type: character -----------------------------------------------------------------------------------------------------------------------------------------------------------
# A tibble: 16 x 8
skim_variable n_missing complete_rate min max empty n_unique whitespace
* <chr> <int> <dbl> <int> <int> <int> <int> <int>
1 gender 0 1 4 6 0 2 0
2 Partner 0 1 2 3 0 2 0
3 Dependents 0 1 2 3 0 2 0
4 PhoneService 0 1 2 3 0 2 0
5 MultipleLines 0 1 2 16 0 3 0
6 InternetService 0 1 2 11 0 3 0
7 OnlineSecurity 0 1 2 19 0 3 0
8 OnlineBackup 0 1 2 19 0 3 0
9 DeviceProtection 0 1 2 19 0 3 0
10 TechSupport 0 1 2 19 0 3 0
11 StreamingTV 0 1 2 19 0 3 0
12 StreamingMovies 0 1 2 19 0 3 0
13 Contract 0 1 8 14 0 3 0
14 PaperlessBilling 0 1 2 3 0 2 0
15 PaymentMethod 0 1 12 25 0 4 0
16 Churn 0 1 2 3 0 2 0
-- Variable type: numeric -------------------------------------------------------------------------------------------------------------------------------------------------------------
# A tibble: 4 x 11
skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
* <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 SeniorCitizen 0 1 0.161 0.368 0 0 0 0 1 ▇▁▁▁▂
2 tenure 0 1 32.5 24.5 0 9 29 56 72 ▇▃▃▃▆
3 MonthlyCharges 0 1 64.8 30.1 18.2 35.6 70.4 89.9 119. ▇▅▆▇▅
4 TotalCharges 10 0.998 2298. 2274. 18.8 404. 1412. 3847. 8685. ▇▂▂▂▁
#data exploration
tele[["MonthlyCharges"]]
[1] 24.10 88.15 74.95 55.90 53.45 49.85 90.65 24.90 35.55 116.50 68.75 51.20 99.00 54.90 109.55 106.80 74.30 25.60 94.20 46.35 25.60 107.60 19.55 96.20 69.65
[26] 78.50 45.10 25.75 104.10 104.75 81.35 96.35 93.55 19.70 69.10 35.65 99.40 54.40 19.30 83.80 68.95 62.85 110.45 45.30 75.65 19.85 55.30 24.75 54.75 94.00
[51] 110.30 18.80 100.80 60.00 66.05 79.75 94.80 25.45 111.15 55.25 59.50 25.35 98.55 61.20 60.90 74.55 19.65 48.75 19.15 84.70 34.25 80.70 19.45 19.70 83.20
[76] 19.85 19.15 54.85 75.05 25.10 84.35 89.75 48.40 19.90 51.75 92.40 45.25 94.80 69.85 20.10 74.90 109.90 78.00 91.40 74.95 50.00 20.90 49.80 56.25 30.40
[101] 19.10 59.90 20.00 79.60 19.45 74.85 73.85 20.20 71.10 109.20 62.65 59.45 45.05 60.35 114.95 114.60 50.15 86.65 65.70 19.90 19.85 59.75 45.00 71.40 101.00
[126] 75.40 118.75 19.45 20.40 84.50 83.80 20.35 109.30 89.40 76.50 24.60 105.05 83.65 104.45 106.85 105.10 46.30 76.80 81.35 99.15 20.35 89.85 84.40 19.75 64.45
[151] 68.95 24.40 29.70 58.75 75.40 25.00 114.30 19.65 84.25 74.60 46.00 38.85 19.65 34.60 99.10 50.15 59.45 19.50 112.20 69.95 23.85 54.30 20.20 55.90 19.15
[176] 90.15 44.90 100.50 91.15 70.15 90.00 108.65 49.45 79.20 60.70 79.85 97.20 95.60 89.30 85.25 89.45 94.70 75.35 70.85 29.05 20.60 25.40 101.30 87.25 116.55
[201] 107.70 102.70 115.15 76.10 60.40 64.35 19.60 49.35 24.40 20.50 110.55 99.20 84.95 94.65 39.10 84.45 19.70 19.90 18.90 19.55 85.65 54.55 19.85 86.40 34.65
[226] 24.85 78.10 105.65 78.35 85.95 19.35 71.65 91.15 56.25 95.80 78.45 74.40 69.00 106.10 56.70 102.95 91.40 110.85 21.05 93.80 111.10 55.05 33.15 18.75 70.55
[251] 74.45 65.15 19.85 24.80 20.05 80.55 20.35 89.60 45.05 49.25 50.55 72.25 118.65 66.50 19.45 49.80 69.85 107.20 93.35 20.00 24.50 56.05 80.85 70.50 107.50
[276] 59.85 60.85 100.20 19.65 64.00 90.60 25.25 20.25 93.45 100.50 95.65 43.95 19.25 73.80 104.40 79.25 81.55 93.80 24.30 70.45 25.60 99.00 69.95 76.00 24.90
[301] 64.80 103.05 20.10 19.90 110.65 109.05 19.20 66.05 45.30 84.40 45.00 99.80 106.00 20.65 71.50 83.55 98.70 49.50 48.70 19.20 82.30 21.10 69.00 19.55 75.10
[326] 54.60 70.20 24.75 69.85 79.20 69.65 84.10 65.35 25.75 100.30 89.15 44.85 74.45 86.40 19.70 60.55 25.20 79.40 25.00 73.65 55.60 113.25 68.30 110.05 19.90
[351] 43.95 31.35 64.85 97.20 23.45 70.20 69.55 18.95 19.80 39.65 19.95 69.10 70.05 19.80 105.75 41.90 53.10 105.10 80.70 24.65 53.15 54.65 109.70 90.60 73.00
[376] 54.75 20.50 92.10 75.30 106.75 91.30 80.00 91.30 47.85 50.15 74.45 80.60 94.40 75.25 107.15 116.25 19.55 20.10 20.20 74.80 76.20 55.70 75.30 45.70 54.35
[401] 94.55 77.50 98.15 111.10 58.60 94.55 116.30 89.30 20.70 19.60 99.35 49.80 54.40 54.25 35.50 84.50 59.30 20.65 55.20 25.00 24.30 70.80 78.80 83.30 61.15
[426] 45.15 96.00 35.10 20.25 49.25 74.00 20.25 105.00 105.30 19.60 74.30 89.65 19.35 81.80 95.75 40.20 90.95 108.20 85.80 76.25 19.70 94.65 104.40 103.30 80.60
[451] 19.30 104.30 60.75 19.65 19.85 56.15 19.95 44.80 83.55 100.80 60.30 96.25 44.60 72.75 25.30 79.70 110.10 95.10 80.50 107.40 99.30 86.60 52.20 88.35 40.05
[476] 90.85 75.25 69.55 79.90 85.35 114.35 24.60 19.95 79.85 59.70 19.45 107.55 70.35 20.85 89.60 19.65 83.60 84.25 111.45 74.95 97.35 69.35 84.60 98.40 51.05
[501] 95.70 94.70 69.05 64.45 19.85 19.70 95.15 72.45 89.40 20.20 70.65 73.55 19.95 20.30 64.95 19.35 20.85 76.15 99.65 53.85 92.35 96.40 114.20 81.50 45.00
[526] 104.95 90.35 110.50 105.00 55.50 59.10 56.15
#2/3)
#Base model analysis
lrn_cart <- lrn("classif.rpart", predict_type = "prob")
lrn_glm <- lrn("classif.glmnet", predict_type = "prob", alpha = 1)
pl_glm <- po("encode") %>>% po(lrn_glm)
lrn_feat <- lrn("classif.featureless", predict_type = "prob")
#lrn_lda <- lrn("classif.lda", predict_type = "prob")
#pl_lda <- po(lrn_lda)
lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<- po(lrn_ranger)
lrn_xgboost <- lrn("classif.xgboost", predict_type = "prob", eval_metric= "error")
pl_xgb <- po("encode") %>>% po(lrn_xgboost)
tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)
#tele$Churn <- factor(tele$Churn, levels=c(0, 1))
credit_task <- TaskClassif$new(id = "telee",
backend = tele, # <- NB: no na.omit() this time
target = "Churn",
positive = "Yes")
cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set
res <- benchmark(data.table(
task = list(credit_task),
learner = list(lrn_cart,pl_glm,
lrn_feat,
pl_ranger,
pl_xgb),
resampling = list(cv5)
), store_models = TRUE)
INFO [20:57:06.281] [mlr3] Running benchmark with 5 resampling iterations
INFO [20:57:07.329] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:57:09.129] [mlr3] Applying learner 'classif.featureless' on task 'telee' (iter 1/1)
INFO [20:57:09.149] [mlr3] Applying learner 'encode.classif.xgboost' on task 'telee' (iter 1/1)
INFO [20:57:09.903] [mlr3] Applying learner 'encode.classif.glmnet' on task 'telee' (iter 1/1)
INFO [20:57:11.388] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [20:57:11.529] [mlr3] Finished benchmark
res$aggregate(list(msr("classif.ce"),
msr("classif.fpr"),
msr("classif.fnr")))
NA
NA
#2/3)
#Tested the params I found through tuning (see tuning code below this cell)
#Note only tuned xgboost as GLMNET has a model called CV_GLMNET which tunes the regularisation param for us
lrn_cart <- lrn("classif.rpart", predict_type = "prob")
lrn_rcart <- lrn("classif.rpart", predict_type = "prob",cp = 0.013)#0.013
lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<- po(lrn_ranger)
lrn_rranger <- lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
pl_rranger<- po(lrn_rranger)
credit_task <- TaskClassif$new(id = "telee",
backend = tele, # <- NB: no na.omit() this time
target = "Churn", positive = "Yes")
cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set
res <- benchmark(data.table(
task = list(credit_task),
learner = list(lrn_cart,
lrn_rcart,
pl_ranger,
pl_rranger),
resampling = list(cv5)
), store_models = TRUE)
INFO [20:57:16.997] [mlr3] Running benchmark with 4 resampling iterations
INFO [20:57:17.035] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:57:18.063] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:57:20.070] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [20:57:20.144] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [20:57:20.231] [mlr3] Finished benchmark
res$aggregate(list(msr("classif.ce"),
msr("classif.fpr"),
msr("classif.fnr"),
msr("classif.fn"),
msr("classif.fp"),
msr("classif.tp"),
msr("classif.tn")
))
NA
NA
#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task
learner = list(lrn_cart,lrn_rcart, lrn_ranger,lrn_rranger)
resampling = rsmp("bootstrap")
#try bootstrap on the rest
#check i am using all the column in the data
#try larger grid values
#try chan ging thr fp trade off values
#look at bentchmarking bookm arkj
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO [20:57:25.181] [mlr3] Running benchmark with 120 resampling iterations
INFO [20:57:25.216] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/30)
INFO [20:57:27.918] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 17/30)
INFO [20:57:30.789] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 19/30)
INFO [20:57:30.905] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 23/30)
INFO [20:57:32.321] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/30)
INFO [20:57:32.391] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 2/30)
INFO [20:57:33.518] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 24/30)
INFO [20:57:33.595] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 4/30)
INFO [20:57:36.487] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 22/30)
INFO [20:57:37.599] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 10/30)
INFO [20:57:40.325] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 16/30)
INFO [20:57:42.818] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 11/30)
INFO [20:57:44.031] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 23/30)
INFO [20:57:44.165] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 9/30)
INFO [20:57:45.565] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 14/30)
INFO [20:57:45.639] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 27/30)
INFO [20:57:45.713] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 25/30)
INFO [20:57:45.788] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 29/30)
INFO [20:57:46.785] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 22/30)
INFO [20:57:46.856] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/30)
INFO [20:57:46.929] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 27/30)
INFO [20:57:47.002] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 13/30)
INFO [20:57:48.462] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 10/30)
INFO [20:57:48.580] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 25/30)
INFO [20:57:50.403] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 13/30)
INFO [20:57:51.632] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 30/30)
INFO [20:57:51.745] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 11/30)
INFO [20:57:53.645] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 26/30)
INFO [20:57:53.716] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 17/30)
INFO [20:57:53.812] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 29/30)
INFO [20:57:56.193] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 5/30)
INFO [20:57:58.494] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 9/30)
INFO [20:57:58.577] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 5/30)
INFO [20:57:59.471] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 18/30)
INFO [20:57:59.542] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 4/30)
INFO [20:58:00.479] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 25/30)
INFO [20:58:01.844] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 2/30)
INFO [20:58:01.918] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 14/30)
INFO [20:58:01.985] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 24/30)
INFO [20:58:03.947] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 20/30)
INFO [20:58:05.311] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 24/30)
INFO [20:58:06.230] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 7/30)
INFO [20:58:06.301] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 27/30)
INFO [20:58:07.254] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 28/30)
INFO [20:58:09.534] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 27/30)
INFO [20:58:11.829] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 20/30)
INFO [20:58:13.661] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 11/30)
INFO [20:58:13.778] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 12/30)
INFO [20:58:13.896] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 3/30)
INFO [20:58:16.180] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 12/30)
INFO [20:58:16.251] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 3/30)
INFO [20:58:17.186] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 13/30)
INFO [20:58:17.294] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 28/30)
INFO [20:58:18.574] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 8/30)
INFO [20:58:19.460] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 29/30)
INFO [20:58:19.531] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 6/30)
INFO [20:58:20.462] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 8/30)
INFO [20:58:23.101] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 9/30)
INFO [20:58:23.179] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 18/30)
INFO [20:58:24.561] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 5/30)
INFO [20:58:24.693] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 6/30)
INFO [20:58:24.888] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 2/30)
INFO [20:58:27.103] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 6/30)
INFO [20:58:29.465] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 18/30)
INFO [20:58:31.852] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 21/30)
INFO [20:58:32.749] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 15/30)
INFO [20:58:35.073] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 6/30)
INFO [20:58:35.145] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/30)
INFO [20:58:36.105] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 15/30)
INFO [20:58:36.178] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 11/30)
INFO [20:58:36.250] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 23/30)
INFO [20:58:38.753] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 14/30)
INFO [20:58:39.649] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 19/30)
INFO [20:58:42.146] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 20/30)
INFO [20:58:42.215] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 26/30)
INFO [20:58:44.366] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 16/30)
INFO [20:58:44.477] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 15/30)
INFO [20:58:45.453] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 17/30)
INFO [20:58:45.524] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 10/30)
INFO [20:58:46.391] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 20/30)
INFO [20:58:46.462] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 25/30)
INFO [20:58:46.535] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 23/30)
INFO [20:58:46.649] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 7/30)
INFO [20:58:46.762] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 8/30)
INFO [20:58:46.907] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 30/30)
INFO [20:58:47.015] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 30/30)
INFO [20:58:48.258] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 4/30)
INFO [20:58:48.328] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 21/30)
INFO [20:58:50.296] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 10/30)
INFO [20:58:50.407] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 26/30)
INFO [20:58:51.657] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 22/30)
INFO [20:58:53.601] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 2/30)
INFO [20:58:53.710] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 22/30)
INFO [20:58:53.849] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 7/30)
INFO [20:58:55.944] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 5/30)
INFO [20:58:56.016] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 8/30)
INFO [20:58:56.087] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 28/30)
INFO [20:58:56.162] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 21/30)
INFO [20:58:56.236] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 18/30)
INFO [20:58:56.310] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 13/30)
INFO [20:58:58.781] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 26/30)
INFO [20:58:58.849] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 29/30)
INFO [20:58:58.923] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 16/30)
INFO [20:58:59.011] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 4/30)
INFO [20:58:59.082] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 14/30)
INFO [20:59:01.384] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 16/30)
INFO [20:59:02.258] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 12/30)
INFO [20:59:03.170] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 15/30)
INFO [20:59:03.280] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 3/30)
INFO [20:59:03.388] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 21/30)
INFO [20:59:03.499] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 19/30)
INFO [20:59:03.609] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 7/30)
INFO [20:59:04.889] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 9/30)
INFO [20:59:06.911] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 12/30)
INFO [20:59:09.095] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 17/30)
INFO [20:59:10.053] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 28/30)
INFO [20:59:10.162] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 19/30)
INFO [20:59:11.441] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 3/30)
INFO [20:59:11.514] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 30/30)
INFO [20:59:13.496] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 24/30)
INFO [20:59:13.640] [mlr3] Finished benchmark
head(fortify(object))
autoplot(object)

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task
learner = list(lrn("classif.rpart", predict_type = "prob"))
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO [20:59:22.954] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:22.997] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [20:59:23.075] [mlr3] Finished benchmark
head(fortify(object))
autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task
learner = lrn("classif.rpart", predict_type = "prob",cp = 0.013)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO [20:59:24.706] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:24.742] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [20:59:24.824] [mlr3] Finished benchmark
head(fortify(object))
autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task
learner = lrn("classif.ranger", predict_type = "prob" )
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO [20:59:26.205] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:26.241] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:27.884] [mlr3] Finished benchmark
head(fortify(object))
autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task
learner = lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO [20:59:29.527] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:29.562] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:30.363] [mlr3] Finished benchmark
head(fortify(object))
autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")

#plot of cost penalty for tree
lrn_cart_cv <- lrn("classif.rpart", predict_type = "prob",xval=10)
cv5 <- rsmp("bootstrap")
res_cart_cv <- resample(credit_task, lrn_cart_cv, cv5, store_models = TRUE)
INFO [20:59:31.944] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 11/30)
INFO [20:59:32.237] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 26/30)
INFO [20:59:32.625] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 20/30)
INFO [20:59:33.055] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 7/30)
INFO [20:59:33.453] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 19/30)
INFO [20:59:33.698] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 6/30)
INFO [20:59:33.945] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 8/30)
INFO [20:59:34.217] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 15/30)
INFO [20:59:34.473] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 13/30)
INFO [20:59:34.810] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 30/30)
INFO [20:59:35.194] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 14/30)
INFO [20:59:35.548] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 27/30)
INFO [20:59:35.919] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 12/30)
INFO [20:59:36.300] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 21/30)
INFO [20:59:36.656] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 24/30)
INFO [20:59:36.943] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 16/30)
INFO [20:59:37.194] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 25/30)
INFO [20:59:37.443] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 22/30)
INFO [20:59:37.691] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 17/30)
INFO [20:59:37.945] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 18/30)
INFO [20:59:38.346] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 28/30)
INFO [20:59:38.723] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 4/30)
INFO [20:59:39.078] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 29/30)
INFO [20:59:39.447] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 10/30)
INFO [20:59:39.818] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 9/30)
INFO [20:59:40.172] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/30)
INFO [20:59:40.433] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 3/30)
INFO [20:59:40.682] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 23/30)
INFO [20:59:40.936] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 5/30)
INFO [20:59:41.184] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 2/30)
rpart::plotcp(res_cart_cv$learners[[10]]$model)

lrn_ranger$param_set
<ParamSet>
#Tuning NO. of trees & max depth
learner <- lrn("classif.ranger", predict_type = "prob")
search_space = ps(
num.trees = p_int(lower = 200, upper = 500),
max.depth = p_int(lower = 2, upper = 30)
)
hout <- rsmp("holdout")
measure = msr("classif.acc")
evals20 = trm("evals", n_evals = 10)
task<-TaskClassif$new(id = "telee",
backend = na.omit(tele), # <- NB: no na.omit() this time
target = "Churn",
positive = "Yes")
instance = TuningInstanceSingleCrit$new(
task = task,
learner = learner,
resampling = hout,
measure = measure,
search_space = search_space,
terminator = evals20
)
instance
<TuningInstanceSingleCrit>
* State: Not optimized
* Objective: <ObjectiveTuning:classif.ranger_on_telee>
* Search Space:
<ParamSet>
* Terminator: <TerminatorEvals>
* Terminated: FALSE
* Archive:
<ArchiveTuning>
tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
INFO [20:59:44.194] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerGridSearch>' and '<TerminatorEvals> [n_evals=10]'
INFO [20:59:44.240] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:44.387] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:44.425] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:45.423] [mlr3] Finished benchmark
INFO [20:59:45.552] [bbotk] Result of batch 1:
INFO [20:59:45.618] [bbotk]
INFO [20:59:45.652] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:45.749] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:45.784] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:46.915] [mlr3] Finished benchmark
INFO [20:59:47.075] [bbotk] Result of batch 2:
INFO [20:59:47.125] [bbotk]
INFO [20:59:47.150] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:47.246] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:47.286] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:48.300] [mlr3] Finished benchmark
INFO [20:59:48.446] [bbotk] Result of batch 3:
INFO [20:59:48.509] [bbotk]
INFO [20:59:48.535] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:48.629] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:48.677] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:50.205] [mlr3] Finished benchmark
INFO [20:59:50.374] [bbotk] Result of batch 4:
INFO [20:59:50.421] [bbotk]
INFO [20:59:50.449] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:50.562] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:50.601] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:51.991] [mlr3] Finished benchmark
INFO [20:59:52.143] [bbotk] Result of batch 5:
INFO [20:59:52.188] [bbotk]
INFO [20:59:52.216] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:52.313] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:52.352] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:53.145] [mlr3] Finished benchmark
INFO [20:59:53.321] [bbotk] Result of batch 6:
INFO [20:59:53.373] [bbotk]
INFO [20:59:53.401] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:53.512] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:53.547] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:54.879] [mlr3] Finished benchmark
INFO [20:59:55.066] [bbotk] Result of batch 7:
INFO [20:59:55.117] [bbotk]
INFO [20:59:55.144] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:55.250] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:55.289] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:56.786] [mlr3] Finished benchmark
INFO [20:59:56.936] [bbotk] Result of batch 8:
INFO [20:59:56.984] [bbotk]
INFO [20:59:57.010] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:57.110] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:57.151] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:57.766] [mlr3] Finished benchmark
INFO [20:59:57.935] [bbotk] Result of batch 9:
INFO [20:59:57.986] [bbotk]
INFO [20:59:58.012] [bbotk] Evaluating 1 configuration(s)
INFO [20:59:58.139] [mlr3] Running benchmark with 1 resampling iterations
INFO [20:59:58.173] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [20:59:59.770] [mlr3] Finished benchmark
INFO [20:59:59.942] [bbotk] Result of batch 10:
INFO [20:59:59.990] [bbotk]
INFO [21:00:00.040] [bbotk] Finished optimizing after 10 evaluation(s)
INFO [21:00:00.065] [bbotk] Result:
INFO [21:00:00.128] [bbotk]
lrn_cart$param_set
<ParamSet>
#tuning tree i.e the penalty cost
learner <- lrn("classif.rpart", predict_type = "prob")
search_space = ps(
cp = p_dbl(lower = 0.0001, upper = 0.1)
)
#tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)
hout <- rsmp("holdout")
measure = msr("classif.acc")
evals20 = trm("evals", n_evals = 10)
task<-TaskClassif$new(id = "telee",
backend = tele, # <- NB: no na.omit() this time
target = "Churn",
positive = "Yes")
instance = TuningInstanceSingleCrit$new(
task = task,
learner = learner,
resampling = hout,
measure = measure,
search_space = search_space,
terminator = evals20
)
instance
<TuningInstanceSingleCrit>
* State: Not optimized
* Objective: <ObjectiveTuning:classif.rpart_on_telee>
* Search Space:
<ParamSet>
* Terminator: <TerminatorEvals>
* Terminated: FALSE
* Archive:
<ArchiveTuning>
tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
INFO [21:00:01.557] [bbotk] Starting to optimize 1 parameter(s) with '<OptimizerGridSearch>' and '<TerminatorEvals> [n_evals=10]'
INFO [21:00:01.583] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:01.655] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:01.692] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:01.763] [mlr3] Finished benchmark
INFO [21:00:01.892] [bbotk] Result of batch 1:
INFO [21:00:01.949] [bbotk]
INFO [21:00:01.973] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:02.058] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:02.098] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:02.168] [mlr3] Finished benchmark
INFO [21:00:02.286] [bbotk] Result of batch 2:
INFO [21:00:02.328] [bbotk]
INFO [21:00:02.351] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:02.418] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:02.454] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:02.531] [mlr3] Finished benchmark
INFO [21:00:02.643] [bbotk] Result of batch 3:
INFO [21:00:02.689] [bbotk]
INFO [21:00:02.711] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:02.779] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:02.815] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:02.900] [mlr3] Finished benchmark
INFO [21:00:03.016] [bbotk] Result of batch 4:
INFO [21:00:03.068] [bbotk]
INFO [21:00:03.094] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:03.162] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:03.196] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:03.307] [mlr3] Finished benchmark
INFO [21:00:03.446] [bbotk] Result of batch 5:
INFO [21:00:03.494] [bbotk]
INFO [21:00:03.519] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:03.585] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:03.624] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:03.709] [mlr3] Finished benchmark
INFO [21:00:03.823] [bbotk] Result of batch 6:
INFO [21:00:03.871] [bbotk]
INFO [21:00:03.900] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:03.972] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:04.008] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:04.093] [mlr3] Finished benchmark
INFO [21:00:04.215] [bbotk] Result of batch 7:
INFO [21:00:04.263] [bbotk]
INFO [21:00:04.292] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:04.364] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:04.402] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:04.490] [mlr3] Finished benchmark
INFO [21:00:04.606] [bbotk] Result of batch 8:
INFO [21:00:04.651] [bbotk]
INFO [21:00:04.676] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:04.744] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:04.778] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:04.897] [mlr3] Finished benchmark
INFO [21:00:05.032] [bbotk] Result of batch 9:
INFO [21:00:05.078] [bbotk]
INFO [21:00:05.103] [bbotk] Evaluating 1 configuration(s)
INFO [21:00:05.230] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:05.270] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/1)
INFO [21:00:05.361] [mlr3] Finished benchmark
INFO [21:00:05.485] [bbotk] Result of batch 10:
INFO [21:00:05.530] [bbotk]
INFO [21:00:05.566] [bbotk] Finished optimizing after 10 evaluation(s)
INFO [21:00:05.590] [bbotk] Result:
INFO [21:00:05.635] [bbotk]
#optimising for the false positive rate
gr = lrn("classif.rpart", predict_type = "prob") %>>% po("threshold")
learner = GraphLearner$new(gr)
search_space = ps(
threshold.thresholds = p_dbl(lower = 0.36, upper = 0.64)
)
terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")
at = AutoTuner$new(
learner = learner,
resampling = rsmp("holdout"),
measure = msr("classif.fnr"),
search_space = search_space,
terminator = terminator,
tuner = tuner
)
at
<AutoTuner:classif.rpart.threshold.tuned>
* Model: -
* Parameters: list()
* Packages: -
* Predict Type: response
* Feature types: logical, integer, numeric, character, factor, ordered, POSIXct
* Properties: featureless, importance, missings, multiclass, oob_error, selected_features, twoclass, weights
grid = benchmark_grid(
task = task,
learner = list(at, lrn("classif.rpart")),
resampling = rsmp("cv", folds = 3)
)
# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")
bmr = benchmark(grid)
INFO [21:00:07.140] [mlr3] Running benchmark with 6 resampling iterations
INFO [21:00:07.177] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/3)
INFO [21:00:07.252] [mlr3] Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 3/3)
INFO [21:00:07.583] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:07.603] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:07.843] [mlr3] Finished benchmark
INFO [21:00:08.134] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:08.154] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:08.365] [mlr3] Finished benchmark
INFO [21:00:08.608] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:08.626] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:08.832] [mlr3] Finished benchmark
INFO [21:00:09.094] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:09.114] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:09.327] [mlr3] Finished benchmark
INFO [21:00:09.577] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:09.617] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:09.847] [mlr3] Finished benchmark
INFO [21:00:10.227] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:10.242] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:10.497] [mlr3] Finished benchmark
INFO [21:00:10.939] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:10.966] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:11.304] [mlr3] Finished benchmark
INFO [21:00:11.671] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:11.696] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:12.074] [mlr3] Finished benchmark
INFO [21:00:12.434] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:12.455] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:12.752] [mlr3] Finished benchmark
INFO [21:00:13.119] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:13.135] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:13.327] [mlr3] Finished benchmark
INFO [21:00:13.882] [mlr3] Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 1/3)
INFO [21:00:14.345] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:14.367] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:14.667] [mlr3] Finished benchmark
INFO [21:00:15.148] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:15.173] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:15.474] [mlr3] Finished benchmark
INFO [21:00:15.854] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:15.875] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:16.207] [mlr3] Finished benchmark
INFO [21:00:16.496] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:16.511] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:16.708] [mlr3] Finished benchmark
INFO [21:00:16.951] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:16.978] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:17.277] [mlr3] Finished benchmark
INFO [21:00:17.638] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:17.666] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:17.976] [mlr3] Finished benchmark
INFO [21:00:18.381] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:18.404] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:18.703] [mlr3] Finished benchmark
INFO [21:00:19.055] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:19.082] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:19.391] [mlr3] Finished benchmark
INFO [21:00:19.737] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:19.752] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:19.942] [mlr3] Finished benchmark
INFO [21:00:20.220] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:20.248] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:20.515] [mlr3] Finished benchmark
INFO [21:00:21.251] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 2/3)
INFO [21:00:21.344] [mlr3] Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 2/3)
INFO [21:00:21.743] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:21.775] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:22.080] [mlr3] Finished benchmark
INFO [21:00:22.482] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:22.506] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:22.795] [mlr3] Finished benchmark
INFO [21:00:23.098] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:23.112] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:23.309] [mlr3] Finished benchmark
INFO [21:00:23.535] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:23.550] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:23.808] [mlr3] Finished benchmark
INFO [21:00:24.160] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:24.184] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:24.517] [mlr3] Finished benchmark
INFO [21:00:24.873] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:24.892] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:25.181] [mlr3] Finished benchmark
INFO [21:00:25.564] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:25.587] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:25.888] [mlr3] Finished benchmark
INFO [21:00:26.254] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:26.279] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:26.552] [mlr3] Finished benchmark
INFO [21:00:26.778] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:26.794] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:27.004] [mlr3] Finished benchmark
INFO [21:00:27.358] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:27.376] [mlr3] Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1)
INFO [21:00:27.661] [mlr3] Finished benchmark
INFO [21:00:28.454] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 3/3)
INFO [21:00:28.585] [mlr3] Finished benchmark
bmr$aggregate(msrs(c("classif.ce",
"classif.fpr",
"classif.fnr",
"classif.fn",
"classif.fp",
"classif.tp",
"classif.tn")))
NA
NA
NA
NA
#optimising for the false positive rate
task<-TaskClassif$new(id = "telee",
backend = na.omit(tele), # <- NB: no na.omit() this time
target = "Churn",
positive = "Yes")
gr = lrn("classif.ranger", predict_type = "prob") %>>% po("threshold")
learner = GraphLearner$new(gr)
search_space = ps(
threshold.thresholds = p_dbl(lower = 0.35, upper = 0.65)
)
terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")
at = AutoTuner$new(
learner = learner,
resampling = rsmp("holdout"),
measure = msr("classif.fnr"),
search_space = search_space,
terminator = terminator,
tuner = tuner
)
at
<AutoTuner:classif.ranger.threshold.tuned>
* Model: -
* Parameters: list()
* Packages: -
* Predict Type: response
* Feature types: logical, integer, numeric, character, factor, ordered, POSIXct
* Properties: featureless, importance, missings, multiclass, oob_error, selected_features, twoclass, weights
grid = benchmark_grid(
task = task,
learner = list(at, lrn("classif.ranger")),
resampling = rsmp("holdout")
)
# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")
bmr = benchmark(grid)
INFO [21:00:31.593] [mlr3] Running benchmark with 2 resampling iterations
INFO [21:00:31.630] [mlr3] Applying learner 'classif.ranger' on task 'telee' (iter 1/1)
INFO [21:00:33.144] [mlr3] Applying learner 'classif.ranger.threshold.tuned' on task 'telee' (iter 1/1)
INFO [21:00:33.950] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:33.968] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:35.351] [mlr3] Finished benchmark
INFO [21:00:36.052] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:36.083] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:37.950] [mlr3] Finished benchmark
INFO [21:00:38.675] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:38.691] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:40.171] [mlr3] Finished benchmark
INFO [21:00:40.781] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:40.806] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:42.305] [mlr3] Finished benchmark
INFO [21:00:42.817] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:42.844] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:44.464] [mlr3] Finished benchmark
INFO [21:00:45.047] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:45.074] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:46.449] [mlr3] Finished benchmark
INFO [21:00:47.139] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:47.160] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:48.892] [mlr3] Finished benchmark
INFO [21:00:49.319] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:49.346] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:51.816] [mlr3] Finished benchmark
INFO [21:00:52.726] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:52.742] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:54.385] [mlr3] Finished benchmark
INFO [21:00:55.094] [mlr3] Running benchmark with 1 resampling iterations
INFO [21:00:55.120] [mlr3] Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1)
INFO [21:00:56.581] [mlr3] Finished benchmark
INFO [21:00:59.824] [mlr3] Finished benchmark
bmr$aggregate(msrs(c("classif.ce",
"classif.fpr",
"classif.fnr",
"classif.fn",
"classif.fp",
"classif.tp",
"classif.tn")))
NA
NA
NA
lrn_cart <- lrn("classif.rpart", predict_type = "prob")
credit_task <- TaskClassif$new(id = "telee",
backend = tele, # <- NB: no na.omit() this time
target = "Churn", positive = "Yes")
cv5 <- rsmp("cv", folds = 10)
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set
res <- benchmark(data.table(
task = list(credit_task),
learner = list(lrn_cart),
resampling = list(cv5)
), store_models = TRUE)
INFO [21:01:01.408] [mlr3] Running benchmark with 10 resampling iterations
INFO [21:01:01.448] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 7/10)
INFO [21:01:01.589] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 2/10)
INFO [21:01:01.663] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 10/10)
INFO [21:01:01.736] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 3/10)
INFO [21:01:01.811] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 6/10)
INFO [21:01:01.894] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 4/10)
INFO [21:01:01.966] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 5/10)
INFO [21:01:02.046] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 8/10)
INFO [21:01:02.118] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 9/10)
INFO [21:01:02.191] [mlr3] Applying learner 'classif.rpart' on task 'telee' (iter 1/10)
INFO [21:01:02.280] [mlr3] Finished benchmark
res$aggregate(list(msr("classif.ce")))
trees <- res$resample_result(1)
# Then, let's look at the tree from first CV iteration, for example:
tree1 <- trees$learners[[1]]
# This is a fitted rpart object, so we can look at the model within
tree1_rpart <- tree1$model
# If you look in the rpart package documentation, it tells us how to plot the
# tree that was fitted
plot(tree1_rpart, compress = TRUE)
text(tree1_rpart, use.n = TRUE)

library(mlr)
Loading required package: ParamHelpers
Warning message: 'mlr' is in 'maintenance-only' mode since July 2019. Future development will only happen in 'mlr3' (<https://mlr3.mlr-org.com>). Due to the focus
on 'mlr3' there might be uncaught bugs meanwhile in {mlr} - please consider switching.
Attaching package: 㤼㸱mlr㤼㸲
The following objects are masked _by_ 㤼㸱.GlobalEnv㤼㸲:
cv5, hout
The following objects are masked from 㤼㸱package:mlr3㤼㸲:
benchmark, resample
The following objects are masked from 㤼㸱package:mlr3verse㤼㸲:
benchmark, resample
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
Attaching package: 㤼㸱randomForest㤼㸲
The following object is masked from 㤼㸱package:gridExtra㤼㸲:
combine
The following object is masked from 㤼㸱package:dplyr㤼㸲:
combine
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
margin
lrns = list(
makeLearner('classif.rpart', predict.type="prob"),
"classif.randomForest"
)
regr.task = makeClassifTask(id = "telee", data = na.omit(tele), target = "Churn")
Provided data is not a pure data.frame but from class tbl_df, hence it will be converted.
rin = makeResampleDesc(method = "Holdout")
lc = generateLearningCurveData(learners = lrns, task = regr.task,
percs = seq(0.1, 1, by = 0.1), measures = acc,
resampling = rin, show.info = FALSE)
plotLearningCurve(lc)

lrns = list(
makeLearner('classif.rpart', predict.type="prob"),
makeLearner("classif.randomForest",ntree = 2,mtry = 1, nodesize=2)
)
rin2 = makeResampleDesc(method = "Holdout", predict = "both")
lc2 = generateLearningCurveData(learners = lrns, task = regr.task,
percs = seq(0.1, 1, by = 0.1),
measures = list(acc, setAggregation(acc, train.mean)), resampling = rin2,
show.info = FALSE)
plotLearningCurve(lc2, facet = "learner")

---
title: "R Notebook"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---
```{r}


library(readr)
tele <- read_csv("C:\\Users\\Rober\\OneDrive\\Documents\\Uni Stuff\\NOTEBOOKS\\Class\\telecom.csv")

View(tele)
```
```{r}
library("tidyverse")
library("ggplot2")
library("magrittr") 
library("dplyr") 
library("data.table")
library("mlr3verse")
library("paradox")
library("mlr3tuning")
```

```{R}
library("skimr")
skim(tele)
#data exploration
```
```{r}
#Data manipulation
library("plyr")
library("FSA")
library("corrplot")
library("gridExtra")
library("GGally")
tele <- tele[complete.cases(tele), ]

group_tenure <- function(tenure){
    if (tenure >= 0 & tenure <= 12){
        return('0-12 Month')
    }else if(tenure > 12 & tenure <= 24){
        return('12-24 Month')
    }else if (tenure > 24 & tenure <= 48){
        return('24-48 Month')
    }else if (tenure > 48 & tenure <=60){
        return('48-60 Month')
    }else if (tenure > 60){
        return('> 60 Month')
    }
}
tele$tenure_group <- sapply(tele$tenure,group_tenure)
tele$tenure_group <- as.factor(tele$tenure_group)

tele$tenure <- NULL
tele$TotalCharges <- NULL

#view(tele)

p7 <- ggplot(tele, aes(x=InternetService)) + ggtitle("Internet Service") + xlab("Internet Service") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p8 <- ggplot(tele, aes(x=OnlineSecurity)) + ggtitle("Online Security") + xlab("Online Security") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
grid.arrange(p7, p8, ncol=2)

p13 <- ggplot(tele, aes(x=StreamingMovies)) + ggtitle("Streaming Movies") + xlab("Streaming Movies") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p14 <- ggplot(tele, aes(x=Contract)) + ggtitle("Contract") + xlab("Contract") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p15 <- ggplot(tele, aes(x=PaperlessBilling)) + ggtitle("Paperless Billing") + xlab("Paperless Billing") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p16 <- ggplot(tele, aes(x=PaymentMethod)) + ggtitle("Payment Method") + xlab("Payment Method") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p17 <- ggplot(tele, aes(x=tenure_group)) + ggtitle("Tenure Group") + xlab("Tenure Group") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
grid.arrange(p13, p14, p15, p16, p17, ncol=2)

ggpairs(tele%>% select(StreamingTV,Partner,Churn),aes(color =Churn ))

tele[["MonthlyCharges"]]


```









```{R}
#2/3)
#Base model analysis

lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_glm <- lrn("classif.glmnet", predict_type = "prob", alpha = 1)
pl_glm <- po("encode") %>>% po(lrn_glm)

lrn_feat <- lrn("classif.featureless", predict_type = "prob")

#lrn_lda <- lrn("classif.lda", predict_type = "prob")
#pl_lda <-  po(lrn_lda)



lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<-  po(lrn_ranger)

lrn_xgboost <- lrn("classif.xgboost", predict_type = "prob", eval_metric= "error")
pl_xgb <- po("encode") %>>% po(lrn_xgboost)

tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

#tele$Churn <- factor(tele$Churn, levels=c(0, 1))

credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")




cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,pl_glm,
                    lrn_feat,
                    pl_ranger,
                    pl_xgb),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr")))


```

```{R}
#2/3)
#Tested the params I found through tuning (see tuning code below this cell)
#Note only tuned xgboost as GLMNET has a model called CV_GLMNET which tunes the regularisation param for us


lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_rcart <- lrn("classif.rpart", predict_type = "prob",cp = 0.013)#0.013

lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<- po(lrn_ranger)	

lrn_rranger <- lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
pl_rranger<-  po(lrn_rranger)



credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,
                    lrn_rcart,
                    pl_ranger,
                    pl_rranger),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr"),
                   msr("classif.fn"),
                   msr("classif.fp"),
                   msr("classif.tp"),
                   msr("classif.tn")
                   ))


```

```{R}

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn_cart,lrn_rcart, lrn_ranger,lrn_rranger)
resampling = rsmp("bootstrap")
#try bootstrap on the rest
#check i am using all the column in the data
#try larger grid values
#try chan ging thr fp trade off values
#look at bentchmarking bookm arkj
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)
```




```{R}

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn("classif.rpart", predict_type = "prob"))
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```
```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    =  lrn("classif.rpart", predict_type = "prob",cp = 0.013)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```


```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob" )
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```


```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```





































```{R}
#plot of cost penalty for tree
lrn_cart_cv <- lrn("classif.rpart", predict_type = "prob",xval=10)
cv5 <- rsmp("bootstrap")
res_cart_cv <- resample(credit_task, lrn_cart_cv, cv5, store_models = TRUE)


rpart::plotcp(res_cart_cv$learners[[10]]$model)
```



```{R}
lrn_ranger$param_set
```

```{R}

#Tuning NO. of trees & max depth
learner <- lrn("classif.ranger", predict_type = "prob")


search_space = ps(
  num.trees = p_int(lower = 200, upper = 500),
  max.depth = p_int(lower = 2, upper = 30)
  
)



hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance


tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
``` 

```{R}
lrn_cart$param_set
```

```{R}

#tuning tree i.e the penalty cost

learner <- lrn("classif.rpart", predict_type = "prob")



search_space = ps(
  cp = p_dbl(lower = 0.0001, upper = 0.1)
)

#tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")



instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance


tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)


```

```{R}

```


```{R}


#optimising for the false positive rate



gr = lrn("classif.rpart", predict_type = "prob") %>>% po("threshold")
learner = GraphLearner$new(gr)


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.36, upper = 0.64)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at

grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.rpart")),
  resampling = rsmp("cv", folds = 3)
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))




```



```{R}
#optimising for the false positive rate


task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

gr = lrn("classif.ranger", predict_type = "prob") %>>% po("threshold")

learner = GraphLearner$new(gr) 


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.35, upper = 0.65)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at

grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.ranger")),
  resampling = rsmp("holdout")
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))



```
```{R}

lrn_cart <- lrn("classif.rpart", predict_type = "prob")


credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("cv", folds = 10)
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce")))



trees <- res$resample_result(1)

# Then, let's look at the tree from first CV iteration, for example:
tree1 <- trees$learners[[1]]

# This is a fitted rpart object, so we can look at the model within
tree1_rpart <- tree1$model

# If you look in the rpart package documentation, it tells us how to plot the
# tree that was fitted

plot(tree1_rpart, compress = TRUE)

text(tree1_rpart, use.n = TRUE)
```




```{R}
library(mlr)
library(randomForest)

lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  "classif.randomForest"
)
regr.task = makeClassifTask(id = "telee", data = na.omit(tele), target = "Churn")

rin = makeResampleDesc(method = "Holdout")
lc = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1), measures = acc,
  resampling = rin, show.info = FALSE)
plotLearningCurve(lc)
```
```{R}
lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  makeLearner("classif.randomForest",ntree = 2,mtry = 1, nodesize=2)
)

rin2 = makeResampleDesc(method = "Holdout", predict = "both")
lc2 = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1),
  measures = list(acc, setAggregation(acc, train.mean)), resampling = rin2,
  show.info = FALSE)
plotLearningCurve(lc2, facet = "learner")
```



